library(tidyverse)
## -- Attaching packages ------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
SNPs<- read.table("data/23andMe_complete.txt", header = TRUE, sep = "\t")
head(SNPs, n=10)
## rsid chromosome position genotype
## 1 rs4477212 1 82154 AA
## 2 rs3094315 1 752566 AA
## 3 rs3131972 1 752721 GG
## 4 rs12124819 1 776546 AG
## 5 rs11240777 1 798959 AG
## 6 rs6681049 1 800007 CC
## 7 rs4970383 1 838555 AC
## 8 rs4475691 1 846808 CT
## 9 rs7537756 1 854250 AG
## 10 rs13302982 1 861808 GG
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ggplot(data = SNPs) +
geom_bar(mapping = aes(x = genotype, fill = chromosome)) +
coord_polar() +
ggtitle("Total SNPs for each genotype") +
ylab("Total number of SNPs") +
xlab("Genotype")
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ggplot(data = SNPs) +
geom_bar(mapping = aes(x = genotype, fill = chromosome)) +
coord_polar() +
ggtitle("Total SNPs for each genotype") +
ylab("Total number of SNPs") +
xlab("Genotype")
pdf("SNP_example_plot.pdf", width=6, height=3)
ggplot(data = SNPs) +
geom_bar(mapping = aes(x = chromosome, fill = genotype))
dev.off()
## png
## 2
ppi <- 300
png("SNP_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
ggplot(data = SNPs) +
geom_bar(mapping = aes(x = chromosome, fill = genotype))
dev.off()
## png
## 2
install.packages("plotly")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point()
ggplotly(p)
library(plotly)
ggplotly(
ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point())
install.packages('DT')
library(DT)
datatable(iris)
data.frame(summary(SNPs$chromosome))
## summary.SNPs.chromosome.
## 1 76909
## 2 77346
## 3 63285
## 4 55017
## 5 56019
## 6 63245
## 7 50965
## 8 49215
## 9 42969
## 10 50322
## 11 47972
## 12 47125
## 13 36078
## 14 30818
## 15 28400
## 16 30167
## 17 26688
## 18 27971
## 19 18533
## 20 23834
## 21 13404
## 22 14100
## X 26007
## Y 1766
## MT 2459
df <- data.frame(chromosome=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "MT"), SNPs=c("76909", "77346", "63285", "55017", "56019", "63245", "50965", "49215", "42969", "50322", "47972", "47125", "36078", "30818", "28400", "30167", "26688", "27971", "18533", "23834", "13404", "14100", "26007", "1766", "2459"), stringsAsFactors=FALSE)
summary(df)
## chromosome SNPs
## Length:25 Length:25
## Class :character Class :character
## Mode :character Mode :character
str(df)
## 'data.frame': 25 obs. of 2 variables:
## $ chromosome: chr "1" "2" "3" "4" ...
## $ SNPs : chr "76909" "77346" "63285" "55017" ...
ab <- ggplot(data = df) +
geom_bar(mapping = aes(x = chromosome, y = SNPs), stat="identity", fill="blue")
ab
ab + ggtitle("SNPs in the human genome")
mycolour <-c("AC" = "BROWN", "AG" = "BROWN", "AT" = "BROWN", "CG" = "BROWN", "CT" = "BROWN", "GT" = "BROWN", "AA" = "BLUE", "CC" = "BLUE", "GG" = "BLUE", "TT" = "BLUE", "A" = "GREEN", "C" = "GREEN", "G" = "GREEN", "T" = "GREEN", "D" = "MAGENTA", "DD" = "MAGENTA", "DI" = "MAGENTA", "I" = "MAGENTA", "II" = "MAGENTA", "--" = "#999999")
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
p <- ggplot(SNPs, aes(SNPs$chromosome, fill = genotype))+
geom_bar(color="black") +
ggtitle("Number of SNPs in the human genome") +
ylab("total number of SNPs") +
xlab("chromosome number")
p + scale_fill_manual(values= mycolour)
Genotype counts per chromosome
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ad <- ggplot(data = SNPs) +
geom_bar(mapping = aes( x = chromosome, fill = chromosome), position = "dodge", color = "black") +
facet_wrap(~genotype, ncol = 1)
ae <-ad + theme(axis.title.x = element_text(size=20),
axis.text.y = element_text(size=20)
)
ae + ggtitle("Genotype count per chromosome")
library(plotly)
ggplotly(
ggplot(SNPs, aes(chromosome, fill = genotype)) +
geom_bar (position = "dodge") +
facet_wrap(~genotype, ncol= 2) +
ggtitle("genotypes across chromosome") +
ylab("genotype counts") +
xlab("chromosome")
)
Chromosome_Y <- subset(SNPs, chromosome == "Y")
head(Chromosome_Y, n=10)
## rsid chromosome position genotype
## 956390 i4000095 Y 2649694 T
## 956391 rs11575897 Y 2655180 G
## 956392 rs2534636 Y 2657176 C
## 956393 i3000043 Y 2658271 G
## 956394 i3000045 Y 2658869 G
## 956395 i4000162 Y 2663707 T
## 956396 rs13303871 Y 2679100 G
## 956397 rs35284970 Y 2734854 C
## 956398 i4000052 Y 2740274 T
## 956399 rs3895 Y 2744628 T